@misc{lee2022deduplicating,
      title={Deduplicating Training Data Makes Language Models Better},
      author={Katherine Lee and Daphne Ippolito and Andrew Nystrom and Chiyuan Zhang and Douglas Eck and Chris Callison-Burch and Nicholas Carlini},
      year={2022},
      eprint={2107.06499},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}
